# coding=UTF-8

'''
Created on 16 Mar 2012

@author: R
'''

from threading import Thread
from Queue import Queue
from sys import argv
from base import processpage
import re

if (len(argv)<2):
    print ("usage: python listURLs [output_file] [start_page]")

urlpre="http://www.soufun.com/house/"
urltail="_.htm"
urlcity="%B1%B1%BE%A9"                    #beijing
urlestate="______%D7%A1%D5%AC___________" #%D7%A1%D5%AC住宅 %B1%F0%CA%FB别墅  %D0%B4%D7%D6%C2%A5写字楼
startpage = argv[2]
endpage = 979
fh_putlist=open(argv[1],"a")

q = Queue()
MultiNumber = 2


def getlist(page_num):
    url = urlpre+urlcity+urlestate+str(page_num)+urltail
    pagetext = processpage(url)
    url_estate = re.compile("(http://.*?soufun.com/)\">(.*?)<\/a").findall(pagetext)
    for z in url_estate:
        fh_putlist.write(z[1]+","+z[0]+"\n")
    print page_num    
        
def working():
    while True:
        pagenum = q.get()
        getlist(pagenum)
        q.task_done()

for i in range(MultiNumber):
    t = Thread(target=working)
    t.setDaemon(True)
    t.start()

for i in range(startpage,endpage):
    q.put(i+1)

q.join()
